rm(list = ls())

## 

library(tidyverse)
library(readxl)

## Get municipal finance data

census <- read_rds('data/muni_finance.rds')%>% 
  mutate(state_id = factor(state_id)) 
  

## Rename states

levels(census$state_id) <- c('S-H', 'HH', 'NDS', 'Bremen', 'NRW',
                             'Hessen', 'RP', 'BW', 'Bayern', 'Saarland',
                             'Berlin', 'Brandenburg', 'M-V', 'Sachsen',
                             'Sachsen-Anhalt', 'Thueringen')

## Normalize all the spending variables by the pre-census population

bt_tvars <- str_detect(colnames(census), 'spending_total_') %>%
  colnames(census)[.] 

## Divide all of them by the pre-census population

census[, bt_tvars] <- sapply(bt_tvars, function(v) {
  census[, v] / census$bt_pop_2012
})

## To long

plot_df <- census %>% 
  pivot_longer(cols = matches('bt_'), names_sep = '_(?!.*_)',
               names_to = c('name', 'year')) %>% 
  pivot_wider(names_from = 'name', values_from = 'value') %>% 
  mutate(year = as.numeric(year)) %>%
  filter(between(pop_dec_09, 0, 20000))

## Indicators for treated and post

plot_df <- plot_df %>%
  mutate(post = ifelse(year > 2013, 1, 0),
         treated_post = (as.numeric(treated) - 1) * post)

## Get info on census application

states <- read_rds("data/states_census.rds") %>%
  dplyr::select(state_name, applies_census, census_first_year) %>%
  mutate(state_name = dplyr::recode(state_name, `Schleswig-Holstein` = 'S-H',
                                    `Niedersachsen` = 'NDS',
                                    `Nordrhein-Westfalen` = 'NRW',
                                    `Thüringen` = 'Thueringen',
                                    `Baden-Württemberg` = 'BW',
                                    `Mecklenburg-Vorpommern` = 'M-V',
                                    `Rheinland-Pfalz` = 'RP'))

## Merge to main data

plot_df <- left_join(plot_df, states, 
                     by = c('state_id' = 'state_name')) 

## Code time relative to applying the census

plot_df <- plot_df %>%
  filter(!is.na(year)) %>% 
  mutate(time_rel = year - census_first_year,
         time_rel = ifelse(time_rel > -1, time_rel + 1, time_rel),
         post_rel = ifelse(time_rel > -1, 1, 0),
         treated_post_rel = post_rel * (as.numeric(treated) - 1))

## Declare outcomes

outcomes <- "bt_spending_total"

#### Conditional on time periods pre/post census ####

## Run

plot_df_agg <- plot_df %>% 
  filter(between(pop_dec_09, 9000,
                 11000)) %>% 
  filter(!state_id == 'B-W') %>% 
  filter(applies_census == 1) %>% 
  group_by(treated, time_rel) %>% 
  summarise(m = mean(bt_spending_total, na.rm = T)) %>% 
  ungroup()

# Figure A.4: trends in spending ----

plot_df_agg %>% 
  mutate(treated = ifelse(treated == 1, 
                          'Population below 10,000 (treated)',
                          'Population above 10,000 (control)')) %>% 
  filter(!time_rel == 4) %>% 
  filter(between(time_rel, -3, 3)) %>% 
  ggplot(aes(time_rel, m, factor(treated))) +
  geom_line(aes(linetype = factor(treated))) +
  geom_point(aes(shape = factor(treated)),
             fill = 'white') +
  geom_vline(xintercept = 0, linetype = 'dotted') +
  theme_bw() +
  scale_linetype_discrete(name = '') +
  scale_shape_manual(name = '', values = c(21, 22),
                     labels = ) +
  ylab('Spending / capita (EUR)') +
  xlab('Year relative to first year census was applied')  +
  theme(legend.position = 'bottom') +
  scale_x_continuous(breaks = -3:3)

